EWAS for MDD PRS


Methods

Subjects

From Generation Scotland,

  • Wave 1: N = 4977

  • Wave 2: N = 4312

DNA methylation data and technical covariates

Processed by Rosie, using EPIC array. M values.

Relatedness controlled for by regressing against GRM. Other regressors include batch and cell proportion.

Additional covariates

Ever smoke, pack years, age, sex and 20 methylation PCs.

Pipeline

From Mark, wiki.


EWAS Results

PRS pT=5e-8

PRS pT=5e-8 no MHC region

PRS pT=0.000001

PRS pT=0.0001

PRS pT=0.01

PRS pT=0.1

PRS pT=1

Summary table

Number of significant CpGs associated with PRS

CpGs to nearest genome-wide significant locus

95% CI for distance to the nearest MDD risk locus:

  • Significant CpG: 36-3.14495510^{5}
  • Non-significant CpG: 1.16545510{5}-6.039677910{7}

Gene annotation

Pathway analysis

GO.result = read.table(here::here('MR_meth_MDD/result/EWAS_MDDprs_fromKathryn/meta-pgrs-mdd/mdd_SNPCH_prs_5e_08/mdd_SNPCH_prs_5e_08.GO.txt'),header=T,stringsAsFactors=F)
KEGG.result = read.table(here::here('MR_meth_MDD/result/EWAS_MDDprs_fromKathryn/meta-pgrs-mdd/mdd_SNPCH_prs_5e_08/mdd_SNPCH_prs_5e_08.KEGG.txt'),header=T,stringsAsFactors=F)

GO.result.sig=GO.result[order(GO.result$FDR,decreasing=F),] %>%
  filter(.,FDR<0.05)
KEGG.result.sig=KEGG.result[order(KEGG.result$FDR,decreasing=F),] %>%
  filter(.,FDR<0.05)

# pathway analysis for non-MHC probes
pathway.dat=fig.dat.GS.pT.5e_08 %>%
      mutate(is.MHC = if_else(MAPINFO>25000000&MAPINFO<35000000&CHR==6,'Yes','No')) %>%
      filter(.,is.MHC == 'No')
GO.result.sig_noMHC <- 
  gometh(sig.cpg=pathway.dat$CpG[pathway.dat$p.adj<0.05], 
         all.cpg=pathway.dat$CpG, collection="GO",
              array.type = "EPIC") %>%
  .[order(.$P.DE,decreasing=F),] 

KEGG.result.sig_noMHC <- 
  gometh(sig.cpg=pathway.dat$CpG[pathway.dat$p.adj<0.05], 
         all.cpg=pathway.dat$CpG, collection="KEGG",
              array.type = "EPIC") %>%
  .[order(.$FDR,decreasing=F),] 

GO results - all CpG

ID ONTOLOGY TERM N DE P.DE FDR
GO:0002476 BP antigen processing and presentation of endogenous peptide antigen via MHC class Ib 8 8 0 0
GO:0002484 BP antigen processing and presentation of endogenous peptide antigen via MHC class I via ER pathway 7 7 0 0
GO:0002486 BP antigen processing and presentation of endogenous peptide antigen via MHC class I via ER pathway, TAP-independent 7 7 0 0
GO:0042611 CC MHC protein complex 23 17 0 0
GO:0071556 CC integral component of lumenal side of endoplasmic reticulum membrane 26 15 0 0
GO:0098553 CC lumenal side of endoplasmic reticulum membrane 26 15 0 0
GO:0098576 CC lumenal side of membrane 31 15 0 0
GO:0042605 MF peptide antigen binding 23 13 0 0
GO:0002478 BP antigen processing and presentation of exogenous peptide antigen 164 22 0 0
GO:0019884 BP antigen processing and presentation of exogenous antigen 171 22 0 0
GO:0048002 BP antigen processing and presentation of peptide antigen 177 22 0 0
GO:0042613 CC MHC class II protein complex 14 10 0 0
GO:0060333 BP interferon-gamma-mediated signaling pathway 86 17 0 0
GO:0019882 BP antigen processing and presentation 212 22 0 0
GO:0012507 CC ER to Golgi transport vesicle membrane 58 14 0 0
GO:0003823 MF antigen binding 52 13 0 0
GO:0002250 BP adaptive immune response 393 26 0 0
GO:0030176 CC integral component of endoplasmic reticulum membrane 145 18 0 0
GO:0002428 BP antigen processing and presentation of peptide antigen via MHC class Ib 9 8 0 0
GO:0031227 CC intrinsic component of endoplasmic reticulum membrane 153 18 0 0

KEGG results - all CpG

ID Description N DE P.DE FDR
path:hsa04612 Antigen processing and presentation 66 20.0 0e+00 0.0e+00
path:hsa05330 Allograft rejection 34 15.0 0e+00 0.0e+00
path:hsa04940 Type I diabetes mellitus 41 16.0 0e+00 0.0e+00
path:hsa05332 Graft-versus-host disease 37 15.0 0e+00 0.0e+00
path:hsa05320 Autoimmune thyroid disease 46 15.0 0e+00 0.0e+00
path:hsa05416 Viral myocarditis 55 15.0 0e+00 0.0e+00
path:hsa04145 Phagosome 140 18.0 0e+00 0.0e+00
path:hsa05310 Asthma 27 9.0 0e+00 0.0e+00
path:hsa05322 Systemic lupus erythematosus 112 13.5 0e+00 0.0e+00
path:hsa04514 Cell adhesion molecules 135 16.0 0e+00 0.0e+00
path:hsa05150 Staphylococcus aureus infection 82 11.0 0e+00 0.0e+00
path:hsa05169 Epstein-Barr virus infection 193 17.0 0e+00 0.0e+00
path:hsa05166 Human T-cell leukemia virus 1 infection 211 18.5 0e+00 0.0e+00
path:hsa04672 Intestinal immune network for IgA production 43 9.0 0e+00 0.0e+00
path:hsa05168 Herpes simplex virus 1 infection 465 20.0 0e+00 1.0e-07
path:hsa05145 Toxoplasmosis 106 12.0 0e+00 2.0e-07
path:hsa05321 Inflammatory bowel disease 61 9.0 0e+00 4.0e-07
path:hsa05140 Leishmaniasis 68 9.0 0e+00 6.0e-07
path:hsa05323 Rheumatoid arthritis 87 9.0 2e-07 3.0e-06
path:hsa04640 Hematopoietic cell lineage 90 9.0 3e-07 4.6e-06

GO results - no MHC

ONTOLOGY TERM N DE P.DE FDR
GO:0060629 BP regulation of homologous chromosome segregation 2 1 0.0016177 1
GO:0006599 BP phosphagen metabolic process 4 1 0.0019652 1
GO:0006603 BP phosphocreatine metabolic process 4 1 0.0019652 1
GO:0042396 BP phosphagen biosynthetic process 4 1 0.0019652 1
GO:0046314 BP phosphocreatine biosynthetic process 4 1 0.0019652 1
GO:0003050 BP regulation of systemic arterial blood pressure by atrial natriuretic peptide 2 1 0.0032532 1
GO:0021691 BP cerebellar Purkinje cell layer maturation 2 1 0.0032875 1
GO:0006600 BP creatine metabolic process 6 1 0.0034783 1
GO:0021590 BP cerebellum maturation 3 1 0.0040527 1
GO:0021699 BP cerebellar cortex maturation 3 1 0.0040527 1
GO:0004111 MF creatine kinase activity 5 1 0.0043818 1
GO:0030644 BP cellular chloride ion homeostasis 4 1 0.0048479 1
GO:0016775 MF phosphotransferase activity, nitrogenous group as acceptor 6 1 0.0050180 1
GO:0042138 BP meiotic DNA double-strand break formation 7 1 0.0054297 1
GO:0035617 BP stress granule disassembly 5 1 0.0054826 1
GO:0001093 MF TFIIB-class transcription factor binding 3 1 0.0054879 1
GO:0051983 BP regulation of chromosome segregation 97 2 0.0056624 1
GO:1902412 BP regulation of mitotic cytokinesis 6 1 0.0059002 1
GO:0070369 CC beta-catenin-TCF7L2 complex 3 1 0.0062280 1
GO:0060631 BP regulation of meiosis I 8 1 0.0063037 1

KEGG results - no MHC

Description N DE P.DE FDR
path:hsa00010 Glycolysis / Gluconeogenesis 64 0 1 1
path:hsa00020 Citrate cycle (TCA cycle) 28 0 1 1
path:hsa00030 Pentose phosphate pathway 25 0 1 1
path:hsa00040 Pentose and glucuronate interconversions 32 0 1 1
path:hsa00051 Fructose and mannose metabolism 32 0 1 1
path:hsa00052 Galactose metabolism 29 0 1 1
path:hsa00053 Ascorbate and aldarate metabolism 27 0 1 1
path:hsa00061 Fatty acid biosynthesis 15 0 1 1
path:hsa00062 Fatty acid elongation 26 0 1 1
path:hsa00071 Fatty acid degradation 42 0 1 1
path:hsa00072 Synthesis and degradation of ketone bodies 9 0 1 1
path:hsa00100 Steroid biosynthesis 18 0 1 1
path:hsa00120 Primary bile acid biosynthesis 17 0 1 1
path:hsa00130 Ubiquinone and other terpenoid-quinone biosynthesis 11 0 1 1
path:hsa00140 Steroid hormone biosynthesis 56 0 1 1
path:hsa00190 Oxidative phosphorylation 115 0 1 1
path:hsa00220 Arginine biosynthesis 20 0 1 1
path:hsa00230 Purine metabolism 125 0 1 1
path:hsa00232 Caffeine metabolism 5 0 1 1
path:hsa00240 Pyrimidine metabolism 55 0 1 1

Summary of EWAS for MDD PRS pT = 5e-8

# 1. No. of significant CpGs in the MHC region
ewas.pTwgsig = ewas.pTwgsig %>%
  mutate(is.MHC = if_else(MAPINFO>25000000&MAPINFO<35000000&CHR==6,'Yes','No'))
no.MHC.sig = table(ewas.pTwgsig$is.MHC[ewas.pTwgsig$p.adj<0.05])
no.MHC.sig
## 
##  No Yes 
##  37 688
# 2. Top 10 CpGs
top.10.sig = ewas.pTwgsig %>%
  filter(.,p.adj<0.05) %>%
  .[order(.$P.value,decreasing = F),] 
top.10.sig[1:10,] %>%
    kbl() %>%
  kable_material(c("striped", "hover"))
CpG Allele1 Allele2 Effect StdErr P.value Direction HetISq HetChiSq HetDf HetPVal CHR MAPINFO p.adj distance_to_nearest_gwashit EWAS.sig is.MHC
84 cg03270340 NA NA 55.5951 2.4688 0 ++ 97.6 41.064 1 0 6 28891204 0 3551 yes Yes
29 cg00903577 NA NA 63.8098 2.8351 0 ++ 99.1 106.379 1 0 6 28831109 0 3099 yes Yes
354 cg12914966 NA NA 79.4726 3.6703 0 ++ 99.4 171.532 1 0 6 28830789 0 2779 yes Yes
470 cg16677399 NA NA 51.7185 2.4974 0 ++ 99.5 212.566 1 0 6 28830902 0 2892 yes Yes
389 cg14345882 NA NA -51.2371 2.5431 0 – 99.3 148.961 1 0 6 26364793 0 137 yes Yes
182 cg06608359 NA NA 59.9925 3.0263 0 ++ 97.1 34.908 1 0 6 28831300 0 3290 yes Yes
678 cg25643361 NA NA -44.1782 2.3263 0 – 98.1 53.559 1 0 6 26361389 0 41 yes Yes
721 cg27543291 NA NA -55.5490 2.9654 0 – 99.3 148.203 1 0 6 26367644 0 10 yes Yes
282 cg10046620 NA NA -41.8334 2.2423 0 – 98.5 66.796 1 0 6 27775042 0 14 yes Yes
233 cg08168669 NA NA -38.2512 2.0864 0 – 99.4 172.357 1 0 6 26367580 0 74 yes Yes
top10.max.p=max(top.10.sig$p.adj[1:10])

# 3. significant CpGs outside of the MHC region
top.10.sig.noMHC = ewas.pTwgsig %>%
  filter(.,p.adj<0.05) %>%
  filter(.,is.MHC=='No') %>%
  .[order(.$P.value,decreasing = F),] 
summary(top.10.sig.noMHC$p.adj)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.000e+00 6.480e-06 1.515e-03 9.472e-03 7.844e-03 4.836e-02
# 4. Top 10 non-MHC CpGs
top.10.sig.noMHC[1:10,] %>%
    kbl() %>%
  kable_material(c("striped", "hover"))
CpG Allele1 Allele2 Effect StdErr P.value Direction HetISq HetChiSq HetDf HetPVal CHR MAPINFO p.adj distance_to_nearest_gwashit EWAS.sig is.MHC
7 cg05590274 NA NA 16.9936 1.9302 0 ++ 94.4 17.997 1 0.0000221 11 113262625 0.0e+00 15822 yes No
26 cg17841099 NA NA -12.1025 1.5366 0 – 97.9 48.399 1 0.0000000 1 153940090 0.0e+00 21962506 yes No
35 cg26200795 NA NA 18.7156 2.4494 0 ++ 91.3 11.449 1 0.0007152 18 52895482 0.0e+00 5322 yes No
8 cg06276712 NA NA -12.4238 1.6339 0 – 96.6 29.631 1 0.0000001 7 12107011 0.0e+00 140319 yes No
27 cg17862947 NA NA 8.2523 1.1073 0 ++ 99.6 285.470 1 0.0000000 12 133086926 1.0e-07 11712199 yes No
6 cg05328885 NA NA 30.1986 4.1802 0 ++ 95.9 24.148 1 0.0000009 11 30943623 4.0e-07 1541 yes No
5 cg04317648 NA NA 11.9459 1.6968 0 ++ 86.6 7.456 1 0.0063220 1 8485376 1.5e-06 553 yes No
4 cg02403154 NA NA -14.5513 2.0861 0 – 0.0 0.386 1 0.5345000 18 52989223 2.3e-06 17013 yes No
22 cg14159747 NA NA -17.4837 2.5178 0 – 93.7 15.880 1 0.0000675 11 113255604 2.9e-06 22843 yes No
15 cg10154826 NA NA 36.6969 5.3737 0 ++ 96.1 25.769 1 0.0000004 6 17600994 6.5e-06 572321 yes No
top.10.sig.noMHC.sCHR=top.10.sig.noMHC[1:10,] %>%
  .[order(.$CHR),]
top10.noMHC.max.p=max(top.10.sig.noMHC$p.adj[1:10])


EWAS replication in LBC

Methods

Subjects

  • From LBC: N = 1330

Data

  • DNA methylation: Processed by Riccardo. 450k array. M-values.

  • Genetic: HRCv1.1 imputed data

  • LD reference: 1000G CEU sample, phase 3, hg19 genome build

Covariates

Smoking status, age, sex, 20 methylation PCs, and cell proportions.

Pipeline

From Mark, wiki.

Local copy (adapted to LBC data): /exports/igmm/eddie/GenScotDepression/shen/Tools/ewas_pipeline/

Results

Manhattan Plot

## Warning: Removed 30820 rows containing missing values (geom_point).

## Warning: Removed 30820 rows containing missing values (geom_point).

Correlation of beta for significant CpGs found in GS

## `geom_smooth()` using formula 'y ~ x'

Summry of replication analysis: on CpGs significant in GS (nCpG = 620)

  • Correlation between betas in GS and LBC for all CpGs: r = 0.784

  • Correlation between betas in GS and LBC for CpGs not in MHC region: r = 0.771

  • Betas remained in the same direction in LBC: 86%

  • CpGs nominally significant in LBC: 33.7%

Replication EWAS for other PRS (other pT)

Figures saved as: MR_meth_MDD/Figs/LBC_EWAS_pplot*

Number of significant CpGs associated with PRS


SNP to CpG mapping


## png 
##   2

##### Calculate SNP-CpG distance
# Define function
cal_distance <- function(tmp.ids,ref.anno.snp,ref.anno.cpg){
  snp.id = as.character(tmp.ids['SNP'])
  cpg.id = as.character(tmp.ids['CpG'])
  
  chr.snp = ref.anno.snp$CHR[ref.anno.snp$RSID==snp.id]
  chr.cpg = ref.anno.cpg$CHR[ref.anno.cpg$ID==cpg.id]
  bp.snp = ref.anno.snp$BP[ref.anno.snp$RSID==snp.id]
  bp.cpg = ref.anno.cpg$MAPINFO[ref.anno.cpg$ID==cpg.id]
  
  if(chr.snp!=chr.cpg){tmp.distance=-999}else{
    tmp.distance = abs(bp.snp-bp.cpg)
  }
  return(tmp.distance)
}

# Reload data
load(here::here('MR_meth_MDD/result/EWAS_MDDprs_fromKathryn/cpg_snp_mapping_pNbeta.RData'))

ls.CpG = ls.CpG %>%
  mutate(.,CHR=gsub('chr','',CHR)) %>%
  mutate(.,CHR=as.numeric(CHR)) %>%
  .[order(.$CHR,decreasing = F),] %>%
  mutate(ID=as.character(ID))

colnames(ls.SNP.102)=c('CHR','BP','RSID')
ls.SNP.102 = ls.SNP.102 %>%
  mutate(.,CHR=gsub('chr','',CHR)) %>%
  mutate(.,CHR=as.numeric(CHR)) %>%
  .[order(.$CHR,decreasing = F),] %>%
  mutate(RSID=as.character(RSID)) %>%
  mutate(is.MHC = if_else(BP>25000000&BP<35000000&CHR==6,'Yes','No'))

p.mat = p.mat %>% column_to_rownames(var="CpG") 
colnames(p.mat)=strsplit(colnames(p.mat),split = '_') %>% unlist %>% .[grep('^rs',.)]
p.mat[p.mat==0]=1e-320  # Recover extremely low p-values to system threshold
p.mat[p.mat>0.05/102]=NA
p.mat.new = -log10(p.mat) %>%
  .[ls.CpG$ID,] %>%
  .[,ls.SNP.102$RSID]

# Transform matrix to long-format data
ids.input = expand.grid(dimnames(p.mat),stringsAsFactors = F)
colnames(ids.input) = c('CpG','SNP')
# Calc distance
distance.long = pbapply(ids.input,FUN=cal_distance,MARGIN = 1,
                      ref.anno.snp=ls.SNP.102,ref.anno.cpg=ls.CpG)
distance.long = data.frame(ids.input,distance.long)
# Transform results back to matrix
distance.mat = matrix(distance.long[,3],ncol=102,byrow = F)
colnames(distance.mat)=colnames(p.mat)
rownames(distance.mat)=rownames(p.mat)
distance.mat[distance.mat==-999]=NA

distance.mat.clean = distance.mat
distance.mat.clean[is.na(p.mat)]=NA

##### Summarise distance
# N sig CpG
colSums(!is.na(p.mat))[ls.SNP.102$RSID[ls.SNP.102$is.MHC=='Yes']]
## rs13198716  rs2393962  rs2232423  rs2232422  rs1265062 rs28732100   rs537160 
##        689        523        689        430        668        444        643
colSums(!is.na(p.mat))[ls.SNP.102$RSID[ls.SNP.102$is.MHC=='No']]
##   rs301806  rs1002656 rs11579246  rs2842586 rs10789214  rs2568958  rs2841188 
##          4          1          3          0          0          2          2 
##  rs4571923  rs2389024 rs10913112 rs11578222 rs12134279  rs4589792  rs1568452 
##          1          0          0          1          0          0          0 
##  rs1518812  rs2697368  rs2111592  rs7622843  rs7617480   rs815710  rs6774461 
##          0          0          0          0          1          0          1 
##   rs362331  rs7697558  rs1022079  rs6837598 rs10471540  rs2582045  rs6882046 
##          0          0          1          0          0          1          1 
## rs13167027   rs161645 rs10866752  rs4464770 rs12530388  rs1396898  rs6933618 
##          0          0          0          0          0          0          0 
##  rs3823624  rs6966915  rs2522831 rs17157433 rs11561993  rs7807677  rs4350019 
##          2          3          0          0          0          0          0 
##  rs7044150 rs10959573 rs10809510  rs3793577  rs1332437 rs13291610  rs3824344 
##          1          0          1          0          0          0          0 
## rs10973229 rs10759879   rs913930   rs549779 rs11818524 rs17766570  rs1021362 
##          0          0          0          0          0          0          0 
## rs11031225  rs7942007  rs1783541 rs10501403  rs7932640  rs1554929  rs6589377 
##          1          1          0          1          0          2          2 
##  rs2187490  rs7978008  rs3883901  rs2056443  rs1343607  rs9592461  rs9531043 
##          0          0          0          0          0          0          0 
##  rs4772087  rs8013655  rs1952039     rs7229  rs1045430   rs942866   rs657586 
##          0          0          0          0          1          2          0 
##  rs8040191   rs716508 rs11077203  rs8061005 rs11642229  rs3091404 rs11643192 
##          0          0          0          0          0          0          0 
##  rs1539853  rs7232543  rs1833288  rs4598994  rs1261086  rs7231748   rs784254 
##          0          1          2          2          2          2          2 
##  rs7236339  rs2072971     rs9074  rs5995992 
##          7          0          2          4
# Distance
distance.summary.trans = 
  colSums(distance.mat.clean>1000000,na.rm =T)[ls.SNP.102$RSID[ls.SNP.102$is.MHC=='No']]/
  colSums(!is.na(distance.mat.clean))[ls.SNP.102$RSID[ls.SNP.102$is.MHC=='No']]
distance.summary.trans = distance.summary.trans[!is.na(distance.mat.clean)>0]

Mendelian Randomisation: CpG -> Brain cortical structure


Methods

Datasets

  • Exposure: mQTL data from GoDMC (N = 20,396). Cohorts that overlap with PGC29 were removed.

  • Outcome: Cortical thickness and cortical surface area (UKB, N~=40k)

Instruments

  • In the EWAS of MDD PRS (pT = 5e-8), a total of 725 CpGs were significant after Bonferroni correction.

  • 615 had more than 30 genetic instruments available.

  • 21 CpGs left after ‘clumping’ (window = 3Mb, r=0.1) in the MHC region.

  • Finally, 20 CpGs with >=5 genetic instruments after data harmonisation entered MR.

MR methods

Results

25 CpGs tested:

Results: DNAm -> Surface area

figs.MR$GMsa

Results: DNAm -> Cortical thickness

figs.MR$GMthk

Results: DNAm -> Cortical volume

figs.MR$GMvol

Results: DNAm -> gFA

figs.MR$WMgFA

Results: DNAm -> gMD

figs.MR$WMgMD

Results: DNAm -> gFA - TR

figs.MR$WMgFA_TR

Results: DNAm -> gMD - TR

figs.MR$WMgMD_TR

Results: DNAm -> FA - ATR

figs.MR$WMFA_ATR

Results: DNAm -> MD - ATR

figs.MR$WMMD_ATR

Results: DNAm -> brain measures (IVW)

reorg_fornemat <- function(tmp.res,targetmethod,tmp.trait,pval_col){
  new.res = tmp.res[[tmp.trait]] %>%
    mutate(.,outcome=tmp.trait,p.adj=p.adjust(get(pval_col),method='bonferroni')) %>%
    filter(.,method==targetmethod) 
}

ivw.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='Inverse variance weighted',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval)) 

egger.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='MR Egger',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval)) %>%
  mutate(p.adj=pval)

wm.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='Weighted median',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval)) 

ivw.res$outcome = recode(ivw.res$outcome,
                         GMsa = 'Surface area',
                         GMthk = 'Cortical thickness',
                         GMvol = 'Cortical volume',
                         WMgMD = 'global MD',
                         WMgFA_TR = 'gFA in\nthalamic radiation',
                         WMgMD_TR = 'gFA in\nthalamic radiation',
                         WMFA_ATR = 'FA in anterior\nthalamic radiation',
                         WMMD_ATR = 'MD in anterior\nthalamic radiation')

egger.res$outcome = recode(egger.res$outcome,
                         GMsa = 'Surface area',
                         GMthk = 'Cortical thickness',
                         GMvol = 'Cortical volume',
                         WMgMD = 'global MD',
                         WMgFA_TR = 'gFA in\nthalamic radiation',
                         WMgMD_TR = 'gFA in\nthalamic radiation',
                         WMFA_ATR = 'FA in anterior\nthalamic radiation',
                         WMMD_ATR = 'MD in anterior\nthalamic radiation')

wm.res$outcome = recode(wm.res$outcome,
                         GMsa = 'Surface area',
                         GMthk = 'Cortical thickness',
                         GMvol = 'Cortical volume',
                         WMgMD = 'global MD',
                         WMgFA_TR = 'gFA in\nthalamic radiation',
                         WMgMD_TR = 'gFA in\nthalamic radiation',
                         WMFA_ATR = 'FA in anterior\nthalamic radiation',
                         WMMD_ATR = 'MD in anterior\nthalamic radiation')


# Prep data for graph 

fig.1=circular_graph_mr(target.res = ivw.res)
fig.2=circular_graph_mr(target.res = egger.res)
fig.3=circular_graph_mr(target.res = wm.res)

fig.total = ggarrange(fig.1,fig.2,fig.3,ncol=3,
                      labels = c('IVW','MR Egger','Weighted-median'),common.legend = T)

ggsave(fig.total,device = 'png',height=15,width = 40,units = 'cm',dpi=300,
       filename = here::here('MR_meth_MDD/Figs/MR_DNAm_to_brain/MR_res.png'))
fig.total


Mendelian Randomisation: CpG -> MDD


Methods

Datasets

  • Exposure: mQTL data from GoDMC (N = 20,396). Cohorts that overlap with PGC29 were removed.

  • Outcome: MDD meta-GWAS (PGC + UKBnoGS + 23andMe, N~=800k)

Instruments

  • In the EWAS of MDD PRS (pT = 5e-8), a total of 725 CpGs were significant after Bonferroni correction.

  • 615 had more than 30 genetic instruments available.

  • 21 CpGs left after ‘clumping’ (window = 250kb, r=0.1)

  • Finally, 25 CpGs with >=5 genetic instruments after data harmonisation entered MR.

MR methods


Results

25 CpGs tested:

Results for IVW, MR-Egger and Weighted median

dat.fig = res %>%
  mutate(ord=1:nrow(.))
  
fig=
  ggplot(dat.fig, aes(x=reorder(exposure,ord), y=-log10(pval), color=method)) + 
  geom_point(position=position_dodge(width = 0.1), stat="identity", size=2) +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    axis.line.x = element_line(size=0.5),
    axis.text=element_text(size=10), axis.title=element_text(size=11),
    plot.title = element_text(lineheight=1, face="bold", vjust=1, hjust=0.5,size=9),
    strip.text = element_text(size=8),
    plot.margin=unit(c(1,1,1,3),'mm')) +
  scale_x_discrete(position='top')+
  geom_hline(yintercept=0,color = "black", size=0.3)+
  geom_hline(yintercept=-log10(0.05),color = "red",linetype='dashed', size=0.3)+
  coord_flip()+
  ylab('P-value for MR')+
  xlab('CpG')
fig

Quality tests for MR

  • MR Steiger directionality: All MR tests were in the correct direction and p values for directionality < 1.61e-135
Quality test 1: Egger intercept
dat.fig = res %>%
  filter(method=='MR Egger') %>%
  mutate(ord=1:nrow(.)) 
  
fig=
  ggplot(dat.fig, aes(x=reorder(exposure,ord), y=-log10(pval.1))) + 
  geom_point(color='tomato',position=position_dodge(width = 0.1), stat="identity", size=2) +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    axis.line.x = element_line(size=0.5),
    axis.text=element_text(size=10), axis.title=element_text(size=11),
    plot.title = element_text(lineheight=1, face="bold", vjust=1, hjust=0.5,size=9),
    strip.text = element_text(size=8),
    plot.margin=unit(c(1,1,1,3),'mm')) +
  scale_x_discrete(position='top')+
  geom_hline(yintercept=0,color = "black", size=0.3)+
  geom_hline(yintercept=-log10(0.05),color = "red",linetype='dashed', size=0.3)+
  coord_flip()+
  ggtitle('MR Egger intercept')+
  ylab('P-value for MR Egger intercept')+
  xlab('CpG')
fig

Quality test 2: Q index (heterogeneity)
dat.fig = res %>%
  filter(method=='Inverse variance weighted') %>%
  mutate(ord=1:nrow(.)) 
  
fig=
  ggplot(dat.fig, aes(x=reorder(exposure,ord), y=-log10(Q_pval))) + 
  geom_point(color='tomato',position=position_dodge(width = 0.1), stat="identity", size=2) +
  theme(
    panel.background = element_blank(),
    panel.grid = element_blank(),
    axis.line.x = element_line(size=0.5),
    axis.text=element_text(size=10), axis.title=element_text(size=11),
    plot.title = element_text(lineheight=1, face="bold", vjust=1, hjust=0.5,size=9),
    strip.text = element_text(size=8),
    plot.margin=unit(c(1,1,1,3),'mm')) +
  scale_x_discrete(position='top')+
  geom_hline(yintercept=0,color = "black", size=0.3)+
  geom_hline(yintercept=-log10(0.05),color = "red",linetype='dashed', size=0.3)+
  coord_flip()+
  ggtitle('Q test (heterogeneity)')+
  ylab('P-value for Q statistics')+
  xlab('CpG')
fig

Quality test 3: MR results and Egger intercept (horizontal pleiotropy)
dat.fig = res 

fig=
  ggplot(dat.fig, aes(x=-log10(pval.1), y=-log10(pval))) + 
  geom_point(position=position_dodge(width = 0.1), stat="identity", size=2) +
  theme(
    axis.line.x = element_line(size=0.5),
    axis.text=element_text(size=10), axis.title=element_text(size=11),
    plot.title = element_text(lineheight=1, face="bold", vjust=1, hjust=0.5,size=9),
    strip.text = element_text(size=8),
    plot.margin=unit(c(1,1,1,3),'mm')) +
  geom_smooth(method=lm , color="red", se=TRUE)+
  facet_wrap(~method,dir='h', scales = "free_y") +
  ylab('P-value for MR')+
  xlab('P-value for Egger intercept')

fig

Quality test 4: MR results and Q statistics (heterogeneity)
dat.fig = res 

fig=
  ggplot(dat.fig, aes(x=-log10(Q_pval), y=-log10(pval))) + 
  geom_point(position=position_dodge(width = 0.1), stat="identity", size=2) +
  theme(
    axis.line.x = element_line(size=0.5),
    axis.text=element_text(size=10), axis.title=element_text(size=11),
    plot.title = element_text(lineheight=1, face="bold", vjust=1, hjust=0.5,size=9),
    strip.text = element_text(size=8),
    plot.margin=unit(c(1,1,1,3),'mm')) +
  geom_smooth(method=lm , color="red", se=TRUE)+
  facet_wrap(~method,dir='h', scales = "free_y") +
  ylab('P-value for MR')+
  xlab('P-value for Q test')

fig

Quality test 5: MR results and instrumental strength
dat.fig = res 

fig=
  ggplot(dat.fig, aes(x=snp_r2.exposure, y=-log10(pval))) + 
  geom_point(position=position_dodge(width = 0.1), stat="identity", size=2) +
  theme(
    axis.line.x = element_line(size=0.5),
    axis.text=element_text(size=10), axis.title=element_text(size=11),
    plot.title = element_text(lineheight=1, face="bold", vjust=1, hjust=0.5,size=9),
    strip.text = element_text(size=8),
    plot.margin=unit(c(1,1,1,3),'mm')) +
  geom_smooth(method=lm , color="red", se=TRUE)+
  facet_wrap(~method,dir='h', scales = "free_y") +
  ylab('P-value for MR')+
  xlab('SNP R2 for exposure')

fig
## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

MR - Summary

  • All CpGs were significant for >= 1 MR method

  • 20 significant for >=2 MR methods

  • 9 significant for all three MR methods + 6 CpGs insig for IVW and Weight median and MR-Egger intercept insignificant = 15 valid causal effects from CpG to MDD

  • Positions of these CpGs

# Find CpGs sig for all three methods
# ls.cpg.3mr = res %>%
#   group_by(exposure) %>%
#   count(pval<0.05) %>%
#   filter(`pval < 0.05`==T, n == 3)
# # CpGs sig for IVW and WM
# ls.cpg.2mr.ivw_wm = res %>%
#   filter(method!='MR Egger') %>%
#   group_by(exposure) %>%
#   count(pval<0.05) %>%
#   filter(`pval < 0.05`==T, n == 2)
# # CpGs insig for MR Egger intercept
# ls.cpg.mregger_intercept_null = res %>%
#   filter(method=='MR Egger') %>%
#   filter(pval.1>0.05)
# # CpGs sig for IVW and WM and insig for MR Egger intercept
# ls.cpg.2mr.valid = ls.cpg.2mr.ivw_wm %>%
#   filter(exposure %in% ls.cpg.mregger_intercept_null$exposure)
# # Final list: 3 methods + 2 methods (ivw+wm+Egger intercept null)
# ls.cpg.valid = c(as.character(ls.cpg.3mr$exposure),
#                  as.character(ls.cpg.2mr.valid$exposure)) %>% unique
# 
# m_plot(dat=fig.dat.pT.5e_08,chr="CHR", bp="MAPINFO", snp="CpG", p="P.value",
#         add_category_name=F,fig_size=c(26,13),tophits_annot=F,y_lim=c(0,120),
#         man_annot=ls.cpg.valid)